# -*- coding:utf-8 -*-
# @Date      :2021/5/1
# @Author    :Maoxian

import requests
import time
import random
from lxml import etree
from queue import Queue
from threading import Thread


# 从IP队列中获取IP地址，并验证是否有效
def validate_worker():
    while True:
        if not q.empty():
            # 验证IP是否可用网址
            url = 'http://httpbin.org/get'
            proxies = q.get()
            try:
                requests.get(url, headers=headers, proxies=proxies, timeout=10)
                print('没有报错，ip可以用')
                with open('ip.txt', 'a')as f:
                    f.write(str(proxies))
                    f.write('\n')
            except:
                print('ip不可用  下一个')
        else:
            break


# 使用多线程验证代理是否可用
def validat():
    t_list = []
    for _ in range(5):
        t = Thread(target=validate_worker)
        t_list.append(t)
        t.start()

    for t in t_list:
        t.join()


q = Queue()
# 爬取代理IP  10页
for i in range(1, 6):
    url = 'https://www.kuaidaili.com/free/inha/{}'.format(i)
    print(url)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/63.0.3239.132 Safari/537.36',
    }
    html = requests.get(url, headers=headers).text
    parse_html = etree.HTML(html)
    tr_list = parse_html.xpath('//*[@id="list"]/table/tbody/tr')
    # 延迟访问6到11秒。
    sleep = random.randint(6, 11)
    print(f'等待{sleep}秒')
    time.sleep(sleep)
    print('开始')
    for tr in tr_list[1:]:
        ip = tr.xpath('./td[1]/text()')[0]
        port = tr.xpath('./td[2]/text()')[0]
        proxies = {
            'http': f'http://{ip}:{port}',
            'https': f'https://{ip}:{port}',
        }
        print(proxies)
        # 存入队列
        q.put(proxies)
    # 用多线程验证代理的有效性
    validat()
